Chapter 6 Community composition
6.1 Taxonomy overview
6.1.1 Stacked barplot
genome_metadata<- genome_metadata%>%
mutate(phylum=str_remove_all(phylum, "p__"))
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors)+
facet_grid(~region, scale="free", space="free") +
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")6.1.2 Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum) %>%
summarise(relabun=sum(count))
phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean) %>%
tt()| phylum | mean | sd |
|---|---|---|
| Bacteroidota | 0.4035989436 | 0.145467610 |
| Fusobacteriota | 0.2524757044 | 0.116738961 |
| Bacillota_A | 0.1569779500 | 0.083006580 |
| Pseudomonadota | 0.1102730723 | 0.091046930 |
| Bacillota | 0.0325629996 | 0.047546126 |
| Bacillota_C | 0.0258155713 | 0.030445893 |
| Campylobacterota | 0.0053179332 | 0.008379673 |
| Actinomycetota | 0.0052739897 | 0.007053559 |
| Deferribacterota | 0.0040560203 | 0.006214013 |
| Bacillota_B | 0.0023438584 | 0.006878646 |
| Spirochaetota | 0.0009585895 | 0.002398975 |
| Desulfobacterota | 0.0003453677 | 0.001015241 |
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
select(phylum) %>%
pull()
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
geom_jitter(alpha=0.5) +
theme_minimal() +
theme(legend.position="none") +
labs(y="Phylum",x="Relative abundance")6.1.3 Phylum percentages by site
Daneborg dogs
| Phylum | mean | sd |
|---|---|---|
| Bacteroidota | 47.42647071 | 12.41051445 |
| Fusobacteriota | 26.44829722 | 8.85569343 |
| Bacillota_A | 11.78407839 | 5.26808838 |
| Pseudomonadota | 9.30642610 | 4.09486152 |
| Bacillota | 2.01995216 | 1.12909691 |
| Bacillota_C | 1.82946040 | 0.79964496 |
| Deferribacterota | 0.44065871 | 0.47755553 |
| Actinomycetota | 0.33862019 | 0.58878812 |
| Campylobacterota | 0.21681726 | 0.42039106 |
| Spirochaetota | 0.07390032 | 0.12704418 |
| Desulfobacterota | 0.06907354 | 0.13605743 |
| Bacillota_B | 0.04624502 | 0.09256777 |
Ittoqqortoormii dogs
| Phylum | mean | sd |
|---|---|---|
| Bacteroidota | 33.2933180 | 13.1648465 |
| Fusobacteriota | 24.0468437 | 14.0006093 |
| Bacillota_A | 19.6115116 | 8.9878472 |
| Pseudomonadota | 12.7481884 | 12.0768015 |
| Bacillota | 4.4926478 | 6.4481701 |
| Bacillota_C | 3.3336539 | 4.1302504 |
| Campylobacterota | 0.8467694 | 1.0233386 |
| Actinomycetota | 0.7161777 | 0.7696304 |
| Bacillota_B | 0.4225267 | 0.9387879 |
| Deferribacterota | 0.3705454 | 0.7452951 |
| Spirochaetota | 0.1178176 | 0.3162563 |
6.2 Taxonomy boxplot
6.2.1 Family
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family) %>%
summarise(relabun=sum(count))
family_summary %>%
group_by(family) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean) %>%
tt()| family | mean | sd |
|---|---|---|
| f__Bacteroidaceae | 3.967680e-01 | 1.500812e-01 |
| f__Fusobacteriaceae | 2.524757e-01 | 1.167390e-01 |
| f__Lachnospiraceae | 6.937163e-02 | 5.162657e-02 |
| f__Succinivibrionaceae | 4.514358e-02 | 4.599230e-02 |
| f__Burkholderiaceae_A | 3.599256e-02 | 1.658700e-02 |
| f__Ruminococcaceae | 3.000129e-02 | 2.083510e-02 |
| f__Enterobacteriaceae | 2.893626e-02 | 8.003545e-02 |
| f__Peptostreptococcaceae | 2.580817e-02 | 2.802819e-02 |
| f__Clostridiaceae | 1.944158e-02 | 3.937397e-02 |
| f__Acidaminococcaceae | 1.755676e-02 | 8.366470e-03 |
| f__Selenomonadaceae | 8.258810e-03 | 3.029587e-02 |
| f__Erysipelotrichaceae | 6.992056e-03 | 8.757741e-03 |
| f__Anaeroplasmataceae | 6.453126e-03 | 9.215863e-03 |
| f__Lactobacillaceae | 6.211658e-03 | 3.296719e-02 |
| f__Turicibacteraceae | 4.883145e-03 | 1.619002e-02 |
| f__Coriobacteriaceae | 4.215944e-03 | 6.053805e-03 |
| f__Mucispirillaceae | 4.056020e-03 | 6.214013e-03 |
| f__Helicobacteraceae | 3.509401e-03 | 5.952573e-03 |
| f__Enterococcaceae | 3.123536e-03 | 2.247890e-02 |
| f__Oscillospiraceae | 3.086101e-03 | 5.295318e-03 |
| f__Butyricicoccaceae | 2.721653e-03 | 3.113349e-03 |
| f__Muribaculaceae | 2.608256e-03 | 6.651236e-03 |
| f__Peptococcaceae | 2.343858e-03 | 6.878646e-03 |
| f__CAG-508 | 2.325013e-03 | 1.995239e-03 |
| f__Streptococcaceae | 2.313326e-03 | 7.004722e-03 |
| f__Coprobacillaceae | 2.183605e-03 | 3.762140e-03 |
| f__Anaerotignaceae | 2.105212e-03 | 3.582049e-03 |
| f__Tannerellaceae | 1.883546e-03 | 4.440356e-03 |
| f__Campylobacteraceae | 1.808532e-03 | 5.010135e-03 |
| f__Marinifilaceae | 1.500172e-03 | 5.097462e-03 |
| f__Brachyspiraceae | 9.585895e-04 | 2.398975e-03 |
| f__UBA932 | 8.389653e-04 | 2.555016e-03 |
| f__CAG-274 | 8.267501e-04 | 1.348617e-03 |
| f__Eggerthellaceae | 8.159672e-04 | 1.624225e-03 |
| f__Cellulosilyticaceae | 4.732449e-04 | 1.929303e-03 |
| f__Desulfovibrionaceae | 3.453677e-04 | 1.015241e-03 |
| f__Anaerovoracaceae | 3.247569e-04 | 7.467800e-04 |
| f__CAG-826 | 2.912959e-04 | 5.753363e-04 |
| f__Bifidobacteriaceae | 2.420788e-04 | 1.807747e-03 |
| f__Peptoniphilaceae | 2.413738e-04 | 8.768049e-04 |
| f__JAAYXM01 | 1.716132e-04 | 6.218848e-04 |
| f__Beijerinckiaceae | 1.585673e-04 | 1.207613e-03 |
| f__Mycoplasmoidaceae | 6.465891e-05 | 3.399451e-04 |
| f__Burkholderiaceae_C | 4.210337e-05 | 2.455972e-04 |
| f__ | 3.978316e-05 | 1.733718e-04 |
| f__Acutalibacteraceae | 3.977628e-05 | 2.255595e-04 |
| f__UBA3375 | 2.708135e-05 | 9.892183e-05 |
| f__Catellicoccaceae | 1.951115e-05 | 1.485925e-04 |
family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
# Per region
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~region)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")6.2.2 Genus
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,phylum,genus) %>%
summarise(relabun=sum(count)) %>%
filter(genus != "g__") %>%
mutate(genus= sub("^g__", "", genus))
genus_summary_sort <- genus_summary %>%
group_by(genus) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean)
genus_summary_sort %>%
tt()| genus | mean | sd |
|---|---|---|
| Phocaeicola | 2.276024e-01 | 1.030967e-01 |
| Fusobacterium_A | 1.344294e-01 | 9.447745e-02 |
| Fusobacterium_B | 9.108268e-02 | 4.487667e-02 |
| Bacteroides | 5.732156e-02 | 3.278054e-02 |
| Alloprevotella | 4.766166e-02 | 5.549928e-02 |
| Anaerobiospirillum | 4.414681e-02 | 4.592350e-02 |
| Mediterranea | 3.741307e-02 | 2.702094e-02 |
| Sutterella | 3.103683e-02 | 1.516295e-02 |
| Escherichia | 2.807037e-02 | 7.932913e-02 |
| Faecalibacterium | 2.340600e-02 | 1.733790e-02 |
| Prevotella | 2.018810e-02 | 4.308974e-02 |
| Phascolarctobacterium_A | 1.755676e-02 | 8.366470e-03 |
| Blautia | 1.618063e-02 | 1.813142e-02 |
| Peptacetobacter | 1.394900e-02 | 1.201021e-02 |
| Sarcina | 1.139524e-02 | 2.964173e-02 |
| Ruminococcus_B | 1.063530e-02 | 1.750319e-02 |
| Faecalimonas | 9.226381e-03 | 1.329828e-02 |
| Cetobacterium_A | 8.844355e-03 | 4.284187e-02 |
| Megamonas | 8.258810e-03 | 3.029587e-02 |
| Blautia_A | 7.895308e-03 | 7.135310e-03 |
| Peptostreptococcus | 7.383090e-03 | 2.411080e-02 |
| CALUXS01 | 6.453126e-03 | 9.215863e-03 |
| Schaedlerella | 5.844351e-03 | 8.304399e-03 |
| Paraprevotella | 5.729017e-03 | 1.173181e-02 |
| Turicibacter | 4.883145e-03 | 1.619002e-02 |
| Aphodousia | 4.280159e-03 | 5.412066e-03 |
| Collinsella | 4.215944e-03 | 6.053805e-03 |
| Clostridium | 4.215383e-03 | 8.849574e-03 |
| Eisenbergiella | 4.127377e-03 | 4.150112e-03 |
| Fournierella | 3.810018e-03 | 3.680947e-03 |
| Enterococcus_B | 3.058641e-03 | 2.248437e-02 |
| Mucispirillum | 3.010977e-03 | 5.847022e-03 |
| Lactobacillus | 2.930739e-03 | 1.570378e-02 |
| Ligilactobacillus | 2.808046e-03 | 1.582268e-02 |
| Butyricicoccus | 2.721653e-03 | 3.113349e-03 |
| Allobaculum | 2.644001e-03 | 2.977452e-03 |
| Ventrimonas | 2.628617e-03 | 5.341587e-03 |
| Limisoma | 2.608256e-03 | 6.651236e-03 |
| GCA-900066495 | 2.535003e-03 | 7.209322e-03 |
| Enterocloster | 2.428325e-03 | 2.738649e-03 |
| UMGS1590 | 2.343858e-03 | 6.878646e-03 |
| CAJMNU01 | 2.244379e-03 | 3.136201e-03 |
| Faecousia | 2.101022e-03 | 3.902371e-03 |
| Avimicrobium | 1.978521e-03 | 1.756834e-03 |
| Parabacteroides | 1.883546e-03 | 4.440356e-03 |
| Campylobacter_D | 1.808532e-03 | 5.010135e-03 |
| Helicobacter_A | 1.686258e-03 | 5.099817e-03 |
| Merdicola | 1.623972e-03 | 1.133414e-03 |
| Lachnospira | 1.574311e-03 | 3.698223e-03 |
| Odoribacter | 1.500172e-03 | 5.097462e-03 |
| Clostridium_H | 1.301880e-03 | 4.013538e-03 |
| Lactococcus | 1.258562e-03 | 5.625124e-03 |
| JAHHTG01 | 1.213503e-03 | 6.635896e-03 |
| Roseburia | 1.209379e-03 | 5.131904e-03 |
| Anaerotignum | 1.107741e-03 | 1.705435e-03 |
| Mediterraneibacter | 1.062241e-03 | 6.028260e-03 |
| Streptococcus | 1.054765e-03 | 4.437524e-03 |
| Holdemanella | 1.006879e-03 | 2.085413e-03 |
| Romboutsia_C | 9.837030e-04 | 6.124611e-03 |
| Brachyspira | 9.585895e-04 | 2.398975e-03 |
| Anaerobiospirillum_A | 9.429832e-04 | 2.289970e-03 |
| Fimicola | 8.751261e-04 | 3.276954e-03 |
| Klebsiella | 8.658896e-04 | 4.763963e-03 |
| Clostridium_Q | 8.577680e-04 | 1.636674e-03 |
| Phocaeicola_A | 8.521991e-04 | 2.195967e-03 |
| Cryptobacteroides | 8.389653e-04 | 2.555016e-03 |
| Slackia_A | 8.159672e-04 | 1.624225e-03 |
| Hungatella_A | 8.124222e-04 | 1.814413e-03 |
| Dwaynesavagella | 8.116830e-04 | 3.414155e-03 |
| CALVGN01 | 8.096684e-04 | 1.171787e-03 |
| Clostridium_J | 7.756410e-04 | 2.354818e-03 |
| Gallispira | 7.671477e-04 | 1.302383e-03 |
| Avilachnospira | 7.595761e-04 | 1.773406e-03 |
| Copromonas | 7.399478e-04 | 1.481977e-03 |
| CAG-269 | 7.010411e-04 | 1.856952e-03 |
| Faecalibacillus | 6.819025e-04 | 3.505549e-03 |
| Parasutterella | 6.122753e-04 | 4.662950e-03 |
| Helicobacter_G | 6.017605e-04 | 2.750447e-03 |
| Helicobacter_B | 5.971256e-04 | 2.038090e-03 |
| Faecalitalea | 5.711255e-04 | 1.222346e-03 |
| Clostridium_G | 5.351194e-04 | 1.970231e-03 |
| Helicobacter_C | 5.226296e-04 | 2.160895e-03 |
| Thomasclavelia | 5.142818e-04 | 7.308848e-04 |
| Amedibacterium | 4.891434e-04 | 2.468238e-03 |
| Limosilactobacillus | 4.728731e-04 | 1.797457e-03 |
| Romboutsia | 4.692510e-04 | 1.463961e-03 |
| Catenibacterium | 4.606070e-04 | 1.271267e-03 |
| Negativibacillus | 4.556881e-04 | 1.030846e-03 |
| UBA9414 | 4.053743e-04 | 8.124246e-04 |
| Mailhella | 3.453677e-04 | 1.015241e-03 |
| Dysosmobacter | 3.408519e-04 | 9.011948e-04 |
| Gallibacter | 3.247569e-04 | 7.467800e-04 |
| Oliverpabstia | 3.231635e-04 | 4.962950e-04 |
| Hathewaya | 3.079613e-04 | 1.086915e-03 |
| Lawsonibacter | 3.006862e-04 | 9.125541e-04 |
| Onthovivens | 2.912959e-04 | 5.753363e-04 |
| Cellulosilyticum | 2.671023e-04 | 1.579720e-03 |
| Bifidobacterium | 2.420788e-04 | 1.807747e-03 |
| Anaerosphaera | 2.413738e-04 | 8.768049e-04 |
| Fimiplasma | 2.408513e-04 | 6.143729e-04 |
| UMGS1370 | 2.210713e-04 | 4.205404e-04 |
| Zhenhengia | 2.061426e-04 | 1.157043e-03 |
| Paraclostridium | 2.019028e-04 | 5.515030e-04 |
| RGIG7332 | 1.716132e-04 | 6.218848e-04 |
| Pseudoflavonifractor_A | 1.669855e-04 | 2.623799e-04 |
| Rhodoblastus | 1.585673e-04 | 1.207613e-03 |
| Dielma | 1.490374e-04 | 2.938513e-04 |
| Beduini | 1.484189e-04 | 3.120260e-04 |
| CCUG-7971 | 1.481056e-04 | 5.489315e-04 |
| Merdivicinus | 1.397210e-04 | 4.985405e-04 |
| Terrisporobacter | 1.381169e-04 | 6.235356e-04 |
| MGBC140090 | 1.375437e-04 | 3.689981e-04 |
| Metalachnospira | 1.223447e-04 | 2.462986e-04 |
| Amedibacillus | 1.086979e-04 | 4.740354e-04 |
| Anaerofilum | 1.035896e-04 | 4.184692e-04 |
| Helicobacter_D | 1.016273e-04 | 5.427803e-04 |
| Clostridium_AH | 9.867733e-05 | 6.883540e-04 |
| Pseudoscilispira | 9.795886e-05 | 1.700112e-04 |
| JAGZHZ01 | 8.792791e-05 | 3.004922e-04 |
| Evtepia | 7.859597e-05 | 4.704903e-04 |
| RGIG3102 | 6.956484e-05 | 3.106027e-04 |
| Enterococcus | 6.489543e-05 | 3.221736e-04 |
| Mycoplasmoides | 6.465891e-05 | 3.399451e-04 |
| Duodenibacillus | 6.329115e-05 | 1.362160e-04 |
| Succinivibrio | 5.378879e-05 | 1.812214e-04 |
| Acetatifactor | 5.089177e-05 | 1.814556e-04 |
| Paenalcaligenes | 4.210337e-05 | 2.455972e-04 |
| Scybalenecus | 3.977628e-05 | 2.255595e-04 |
| Angelakisella | 2.914355e-05 | 1.019388e-04 |
| Scybalocola | 2.788638e-05 | 1.143383e-04 |
| UBA3375 | 2.708135e-05 | 9.892183e-05 |
| Merdisoma | 2.284156e-05 | 8.868611e-05 |
| Catellicoccus | 1.951115e-05 | 1.485925e-04 |
| UBA866 | 9.037031e-06 | 5.329642e-05 |
genus_arrange <- genus_summary %>%
group_by(genus) %>%
summarise(mean=sum(relabun)) %>%
filter(genus != "g__")%>%
arrange(-mean) %>%
select(genus) %>%
mutate(genus= sub("^g__", "", genus)) %>%
pull()
#Per region
genus_summary %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors) +
geom_jitter(alpha=0.5) +
facet_grid(.~region)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")